\documentclass[t,12pt,aspectratio=169]{beamer} % 16:9 宽屏比例，适合现代投影
\usepackage{ctex} % 中文支持
\usepackage{amsmath, amssymb} % 数学公式与符号
\usepackage{graphicx}
\usepackage{url}
\usepackage{verbatim}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 插入代码
\usepackage{listings}
\usepackage{color}

% 设置列表的样式
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}

\lstdefinestyle{mystyle}{
    backgroundcolor=\color{backcolour},   
    commentstyle=\color{codegreen},
    keywordstyle=\color{magenta},
    numberstyle=\tiny\color{codegray},
    stringstyle=\color{codepurple},
    basicstyle=\ttfamily\footnotesize,
    breakatwhitespace=false,         
    breaklines=true,                 
    captionpos=b,                    
    keepspaces=true,                 
    numbers=left,                    
    numbersep=5pt,                  
    showspaces=false,                
    showstringspaces=false,
    showtabs=false,                  
    tabsize=2
}

\lstset{style=mystyle}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 主题设置（推荐简洁风格）
\usetheme{Madrid}
\usecolortheme{default} % 可选：seahorse, beaver, dolphin 等

\title{R语言统计入门第6章：回归与相关性 }
\author{PD ET AL}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}

\begin{frame}
  \titlepage
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{目录 Regression and correlation}

\begin{enumerate}
\item[6.1.] 简单线性回归 Simple linear regression 
\item[6.2.] 残差与回归值 Residuals and fitted values 
\item[6.3.] 预测带与置信带 Prediction and confidence bands 
\item[6.4.] 相关性 Correlation 
\item[6.5.] 书中习题 Exercises
\item[6.6.] 单项选择题
\item[6.7.] 简答题 
\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{课程讲解重点难点 }

\begin{enumerate}

%\item  理解线性回归模型的基本知识，使用lm函数进行一元线性回归，解释输出的结果。
%\item 学会计算三种相关系数。

%\item  一元线性回归：模型方程，参数，显著性检验，预测带，置信带
%\item 三种相关系数的计算

\item  一元线性回归模型的基本知识，系数的估计，检验系数是否显著不为零。
\item  lm函数的输入与输出的解读，残差值与回归值。
\item  置信带与预测带的概念，计算与绘图。
\item  Pearson相关系数，Spearman相关系数，和Kendall相关系数的计算。

\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.1.a. 简单线性回归 Simple linear regression}

\begin{itemize}

\item  {\color{red}问题：载入数据框 \verb+thuesen+, 如何用 \verb+blood.glucose+ 来拟合 \verb+short.velocity+ ? }

\item 解答：使用一元线性回归的模型公式。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> library(ISwR)
> attach(thuesen)
> lm01 <- lm(short.velocity ~ blood.glucose)
> summary(lm01)
\end{lstlisting}

\item  {\color{red}问题：从上述程序的运行结果，写出回归方程。}

\item 解答：记 \,{\color{blue}\verb+y = short.velocity+} 以及 \,{\color{blue}\verb+x = blood.glucose+}, 回归方程为
\[ \hat{y} = 1.09781 + 0.02196 x. \]

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.1.b. 回归模型的 summary }

\begin{itemize}
\item  {\color{red}问题：解释这个回归模型的输出结果。}

\item 解答：
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
Call: lm(formula = short.velocity ~ blood.glucose)
Residuals:
     Min       1Q   Median       3Q      Max 
-0.40141 -0.14760 -0.02202  0.03001  0.43490 
Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)    1.09781    0.11748   9.345 6.26e-09 ***
blood.glucose  0.02196    0.01045   2.101   0.0479 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.2167 on 21 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.1737,	Adjusted R-squared:  0.1343 
F-statistic: 4.414 on 1 and 21 DF,  p-value: 0.0479
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.1.c. 回归模型的 summary 的解释 }

%\begin{itemize}
%\item 读出这个回归模型的模型公式。
%\item 显示残差，以及残差的四分位数。
%\item 读出回归方程的截距项和系数项，并确定是否显著。
%\item 读出回归方程的R方和调整后的R方。
%\item 读出回归方差的p值。这个回归模型是否显著？
%\end{itemize}

\begin{itemize}
\item Call: 把回归模型和数据重述一遍。
\item Residuals: 残差的五个四分位数。
\item Coefficients: 两个回归参数的估计值、标准误、$t$统计值、$p$值。
\item Intercept: 截距项。
\item Significance code: 显著性的记号，例如三颗星表示$p$值小于0.001.
\item Residual standard error: 误差项的标准差的估计值。
\item Multiple R-squared: $R^2$ 统计值。
\item F-statistic: $F$统计值。
\item DF: degrees of freedom, 自由度。
\item p-value: 模型的$p$值。
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.1.d. 画出回归直线 }

\begin{itemize}
\item  {\color{red}问题：在上一个例子中，画出散点图与回归直线。}

\item 解答：
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> # library(ISwR)
> # attach(thuesen)
> y <- short.velocity[-16]
> x <- blood.glucose[-16]
> lm01 <- lm(y~x)
> plot(x,y,ylim=c(0,3))
> abline(lm01)
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.1.e. 画出散点图与回归直线 }

 \begin{figure}
 \centering
 \includegraphics[height=0.8\textheight, width=0.8\textwidth]{short-velocity-2.png}
 %\caption{short.velocity explained by blood.glucose}
 \end{figure}
     


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.1.f. 一元线性回归模型(茆诗松书第8.4节)}

\begin{itemize}
\item  {\color{red}问题：什么是一元线性回归模型？}

\item 解答：
\begin{enumerate}
\item  设有自变量 $x$ 和因变量 $y$ 的观测数据 $x_1,\cdots, x_n$ 和 $y_1,\cdots, y_n$. 

\item  在知道 $x$ 的值后，$y$ 的值是一个随机变量 $Y$, 其均值写成 $x$ 的函数为 
\[ f(x)=E(Y|x)=\int_{-\infty}^{\infty} yp(y|x)dx. \] 
这称为 $y$ 关于 $x$ 的回归函数。

\item  一元回归方程的一般形式是 $y=f(x)+\varepsilon$. 

\item  一元线性回归方程的一般形式是 $y=\beta_0 + \beta_1 x + \varepsilon$. 

\item  这个回归模型里的 $\varepsilon$ 称为误差。
\end{enumerate}

%\item 解释预测带和置信带的区别。

\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.1.g. }

\begin{itemize}

\item  {\color{red}问题：设有自变量 $x$ 和因变量 $y$ 的观测数据 $x_1,\cdots, x_n$ 和 $y_1,\cdots, y_n$. 计算一些基本统计量。}

\item  解答：一些基本统计量的记号和计算公式如下，

\begin{eqnarray*}
&{\color{red}\bar{x}}=\frac{1}{n}\sum\limits_{i=1}^{n}x_i, \hspace{0.3cm}
{\color{red}\bar{y}}=\frac{1}{n}\sum\limits_{i=1}^{n}y_i, \\
&{\color{red}L_{xx}}=\sum\limits_{i=1}^{n}(x_i-\bar{x})^2=\sum\limits_{i=1}^{n}x_i^2-n(\bar{x})^2, \hspace{0.3cm}
{\color{red}L_{yy}}=\sum\limits_{i=1}^{n}(y_i-\bar{y})^2=\sum\limits_{i=1}^{n}y_i^2-n(\bar{y})^2, \\
&{\color{red}L_{xy}}=\sum\limits_{i=1}^{n}(x_i-\bar{x})(y_i-\bar{y})=\sum\limits_{i=1}^{n}x_iy_i-n\bar{x}\bar{y}. 
\end{eqnarray*}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.1.h. 一元线性回归模型 }

\begin{itemize}

\item  {\color{red}问题：一元线性回归模型的形式如下，如何估计该模型的参数？
\begin{eqnarray*}
y_i=\beta_0+\beta_1 x_i+\varepsilon_i, \,\,\, \varepsilon_i\overset{iid}{\sim} N(0,\sigma^2). 
\end{eqnarray*} 
}

\item  解答：参数的最小二乘估计使残差的平方和最小，
\begin{eqnarray*}
(\hat{\beta}_0,\hat{\beta}_1) = \underset{\beta_0,\beta_1}{\text{argmin}}\,\, \sum\limits_{i=1}^{n} \left( y_i - \beta_0 - \beta_1 x_i \right)^2. 
\end{eqnarray*}

由此求得回归系数和误差项的方差的估计为
\begin{eqnarray*}
\hat{\beta}_1 = \frac{L_{xy}}{L_{xx}},  \hspace{0.5cm}
\hat{\beta}_0 = \bar{y} - \bar{x}\hat{\beta}_1, \hspace{0.5cm} %\frac{\bar{y}L_{xx} - \bar{x}L_{xy}}{L_{xx}}, 
\hat{\sigma}^2 = \frac{1}{n-2}\sum\limits_{i=1}^{n} (y_i-\hat{y}_i)^2. 
\end{eqnarray*}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.1.i. }

\begin{itemize}

\item  {\color{red}问题：什么是回归值？什么是残差？}

\item  解答：记模型参数 $\beta_0$ 与 $\beta_1$ 的估计为 $\hat{\beta}_0$ 与 $\hat{\beta}_1$, 则 

\begin{enumerate}

\item 称 $\hat{y}_i=\hat{\beta}_0 + \hat{\beta}_1 x_i$ 为 $y_i$ 的回归值或拟合值。

\item 称 $e_i=y_i-\hat{y}_i$ 为 $y_i$ 的残差。

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.1.j. }

\begin{itemize}

\item  {\color{red}问题：如果 $\beta_1$ 的真实值不为零，则称该模型是显著的。如何检验假设 
\begin{eqnarray*}
H_0: \beta_1 = 0, \,\, \mathrm{v.s.} \,\,  H_1: \beta_1 \neq 0 \,\,? 
\end{eqnarray*}
}

\vspace{-0.5cm}

\item 解答：
%考虑统计量 $t=\frac{\hat{\beta}}{s.e.(\hat\beta)}$.
\begin{enumerate}

\item  在正态误差的假设下，估计量 $\hat{\beta}_1$ 服从正态分布 \(\hat{\beta}_1 \sim N(\beta_1,\sigma^2/L_{xx}) . \)

\item  当 $H_0$ 为真时，$\hat{\beta}_1\sim N(0, \sigma^2/L_{xx})$. 因此构造统计量，并求出其分布
\[ T= \frac{\hat{\beta}_1}{\sqrt{\hat{\sigma}^2/L_{xx}}} = \frac{\hat{\beta}_1}{\hat{\sigma}/\sqrt{L_{xx} } } \sim t(n-2). \] 

\item  代入样本数据，当 $T$ 统计量的统计值 $t$ 的绝对值 $|t|$ 很大时，例如 $|t|\ge t_{0.05}(n-2)$, 就拒绝零假设，认为参数 $\beta_1$ 显著不等于零，即自变量 $x$ 对应变量 $y$ 的影响是显著的。

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.1.k. }

\begin{itemize}

\item  {\color{red}问题：写出统计量 $R^2$ 和 $R^2_{\mathrm{adj}}$ 的计算公式。它们的作用是什么？}

\item 解答：
\begin{enumerate}
\item 决定系数用来检验模型是否显著：$$R^2 = \frac{SSR}{SST}. $$
\item （一元线性回归模型的）校正的决定系数定义为 $$R^2_{\mathrm{adj}} = 1 - \frac{n-1}{n-2}(1-R^2).$$
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.1.L. 回归平方和、残差平方和、总离差平方和 }

\begin{itemize}
\item  {\color{red}问题：因变量数据的这三个平方和分别是怎么定义的？}

\item 解答：记 $\bar{y}=(y_1+y_2+\cdots+y_n)/n$ 是因变量数据的均值。 
\begin{eqnarray*}
SSR &=& (\hat{y}_1-\bar{y})^2+(\hat{y}_2-\bar{y})^2+\cdots+(\hat{y}_n-\bar{y})^2, \\
SSE &=& (y_1-\hat{y}_1)^2+(y_2-\hat{y}_2)^2+\cdots+(y_n-\hat{y}_n)^2, \\ 
SST &=& (y_1-\bar{y})^2+(y_2-\bar{y})^2+\cdots+(y_n-\bar{y})^2.
\end{eqnarray*}

\item  {\color{red}问题：这三个平方和之间有什么联系？}

\item 解答：回归平方和 + 残差平方和 = 总离差平方和，即
\begin{eqnarray*}
SSR + SSE = SST.
\end{eqnarray*}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.1.m. 设计矩阵的概念 }

\begin{itemize}
\item  {\color{red}问题：什么是设计矩阵？什么是帽子矩阵？}

\item 解答：设计矩阵是自变量的数据组成的矩阵，行数为观测数，列数为解释变量个数（等于自变量个数或自变量个数加1）。帽子矩阵乘以因变量的观测值就得到因变量的回归值。设计矩阵乘以参数估计值也等于因变量的回归值。
{\footnotesize 
\begin{eqnarray*}
X=\begin{bmatrix}
1 & x_1 \\
1 & x_2 \\
\vdots & \vdots \\
1 & x_n
\end{bmatrix},
\hspace{0.5cm}
{\bf y}=\begin{bmatrix} y_{1} \\ y_{2} \\ \vdots \\ y_{n}  \end{bmatrix},  
\hspace{0.5cm}
\boxed{H=X(X^TX)^{-1}X^T}, 
\hspace{0.5cm}
\hat{\bf y} = H{\bf \, y} = X\hat\beta. 
\end{eqnarray*}
}

\item  回归模型 $y=\beta_0 + \beta_1 x+\varepsilon$ 的参数 $\beta=(\beta_0,\beta_1)^t$ 的最小二乘估计可以写成 
{\footnotesize 
$$\hat\beta=(\hat\beta_0,\hat\beta_1)^t=(X^TX)^{-1}X^T{\bf y}. $$
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.2.a. 残差值与拟合值 Residuals and fitted values}

\begin{itemize}
\item  {\color{red}问题：载入数据框 \verb+thuesen+, 建立 \verb+short.velocity+ 关于 \verb+blood.glucose+ 的回归模型。求出残差值和拟合值。}

\item 解答：首先使用 \,{\color{blue}\verb+lm()+} 函数的出回归模型，然后用 \,{\color{blue}\verb+fitted()+} 函数和  \,{\color{blue}\verb+resid()+} 函数提取拟合值和残差值。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> library(ISwR)
> attach(thuesen)
> lm01 <- lm(short.velocity~blood.glucose)
> summary(lm01)
> fitted(lm01)
> resid(lm01)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.2.b. 因变量、自变量的观测值与回归直线 }

\begin{itemize}
\item  {\color{red}问题：画出散点图、拟合值和残差值的示意图。}

\item 解答：要去掉数据中的缺失部分。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> lm01 <- lm(short.velocity~blood.glucose)
> observed <- !is.na(short.velocity)
> x<-blood.glucose[observed]
> y<-short.velocity[observed]
> yhat<-fitted(lm01)
> plot(x,y,ylim=c(0,3))
> points(x,yhat,col='red')
> lines(x,yhat)
> segments(x,y,x,yhat)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.2.c. 因变量、自变量的观测值与回归直线的图像 }

 \begin{figure}
 \centering
 \includegraphics[height=0.6\textheight, width=0.8\textwidth]{short-velocity-4.png}
 \caption{residuals and fitted values }
 \end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.2.d. 其它方法处理缺失数据}

\begin{itemize}
\item  {\color{red}问题：找出数据完整的所有观测。}

\item 解答一：使用 \,{\color{blue}\verb+complete.cases()+} 函数。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> cc <- complete.cases(thuesen)
> thuesen02 <- thuesen[cc,]
\end{lstlisting}

\item 解答二：使用 \,{\color{blue}\verb+option()+} 函数，指定参数 \,{\color{blue}\verb+na.action+} 的值。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> options(na.action=na.exclude)
> lm02 <- lm(short.velocity~blood.glucose)
> plot(blood.glucose,short.velocity)
> segments(blood.glucose,fitted(lm02),blood.glucose,short.velocity)
> abline(lm02)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.2.e. 画出残差图和qq图 }

\begin{itemize}
\item  {\color{red}问题：画出上述线性回归模型的残差图（横坐标为拟合值）。}

\item 解答：使用 \,{\color{blue}\texttt{resid()}} 函数提取残差值。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> plot(fitted(lm02),resid(lm02))
\end{lstlisting}

\item  {\color{red}画出残差的qq图，以检验残差是否符合正态分布。}

\item 解答：使用 \,{\color{blue}\texttt{qqnorm()}} 函数。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> qqnorm(resid(lm02))
\end{lstlisting}

\item  {\color{red}问题：解释 qq 图的检验原理。}

\item 解答：纵坐标为观测数据。设共有 $n$ 个观测数据。横坐标为正态分布的 $\frac{1}{n}$, $\frac{2}{n}$, $\cdots$, $\frac{n}{n}$ 分位数。若排成直线，则推断观测数据服从正态分布。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.2.f. 残差图与 qq 图}

 \begin{figure}
 \centering
 \includegraphics[height=0.6\textheight, width=0.9\textwidth]{short-velocity-cct-qqt.png}
 \caption{(1) fitted.values - residuals plot, (2) quantile - quantile plot. }
 \end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.3.a. 预测带与置信带6.3 Prediction and confidence bands }

\begin{itemize}
\item  {\color{red}问题：什么是置信带？}

\item 解答：置信带比较窄，反映了回归直线的不确定性。如果观测数量很多，置信带会变窄，此时回归直线比较准确。
%The narrow bands, confidence bands, reflect the uncertainty about the line itself, like the SEM expresses the precision with which a mean is known. If there are many observations, the bands will be quite narrow, reflecting a well-determined line.

\item  {\color{red}问题：什么是预测带？}

\item 解答：预测带比较宽，反映了未来观测值的预测区间。
%The wide bands, prediction bands, include the uncertainty about future observations.

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.3.b. 画出预测带和置信带（代码-1）}

\begin{itemize}
\item  {\color{red}问题：对上一节的线性回归模型，画出预测带和置信带。}

\item 解答：使用  \,{\color{blue}\verb+predict()+} 函数。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> # library(ISwR)
> # attach(thuesen)
> lm01 <- lm(short.velocity~blood.glucose)
> pred.frame <- data.frame(blood.glucose=4:20)
> pp <- predict(lm01,int='p',newdata=pred.frame)
> pc <- predict(lm01,int='c',newdata=pred.frame)
\end{lstlisting}

\item 注释：载入本课程的程序包；\\ 
载入 \,{\color{blue}\verb+thuesen+} 数据框；\\
建立线性回归模型；\\
按自变量的观测范围，定义自变量的等差取值；\\
根据回归模型，对给定的自变量取值，计算预测值的预测区间；\\
计算回归线的置信区间。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.3.c. 画出预测带和置信带（代码-2）}

\begin{itemize}
\item  {\color{red}问题：对上一节的线性回归模型，画出预测带和置信带。}

\item 解答：使用  \,{\color{blue}\verb+matlines()+} 函数可以同时画出几条曲线。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> ylima <- range(short.velocity,pp,na.rm=T)
> ylimb <- c(ylima[1]*0.6,ylima[2]*1.2)
> plot(blood.glucose,short.velocity, ylim=ylimb)
> pred.gluc <- pred.frame$blood.glucose
> matlines(pred.gluc,pc,lty=c(1,2,2),col='black')
> matlines(pred.gluc,pp,lty=c(1,2,2),col='red')
\end{lstlisting}

\item 注释：画出观测数据的散点图；\\
取出自变量的等差取值；\\
画出置信带，设置线型和颜色，注意 \,{\color{blue}\verb+pc+} 不止一行数值；\\
画出预测带，设置线型和颜色。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.3.d. 画出预测带和置信带（图像）}

 \begin{figure}
 \centering
 \includegraphics[height=0.6\textheight, width=0.8\textwidth]{short-velocity-predict-2.png}
 \caption{Confidence band and prediction band. }
 \end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.3.e. 置信带和预测带是如何计算的？ }

\begin{itemize}
\item  {\color{red}问题：写出置信带和预测带的计算公式。}

\item 解答：首先记三个数值，
\begin{eqnarray*}
L_{xx} = \sum\limits_{i=1}^{n}(x_i-\bar{x})^2, \hspace{0.2cm}
h_{00} = \frac{1}{n} + \frac{(x_0-\bar{x})^2}{L_{xx}}, \hspace{0.2cm}
\hat\sigma^2 = \frac{1}{n-2} \sum\limits_{i=1}^{n}(y_i-\hat{y}_i)^2.  
\end{eqnarray*}
然后有置信带和预测带的计算公式，
\begin{eqnarray*}
\text{置信带} &:& \hat{y}_0 \pm t_{\alpha/2}(n-2) \sqrt{h_{00}}\hat\sigma, \\ 
\text{预测带} &:& \hat{y}_0 \pm t_{\alpha/2}(n-2) \sqrt{1+h_{00}}\hat\sigma.
\end{eqnarray*}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.4.a. 相关性 Correlation}

\begin{itemize}
\item  {\color{red}问题：什么是相关系数？}

\item 解答：
\begin{enumerate}
\item  A correlation coefficient is a symmetric, scale-invariant measure of association between two random variables. 
\item  It ranges from  $-1$ to $+1$, where the extremes indicate perfect correlation and 0 means no correlation. 
\item  The sign is negative when large values of one variable are associated with small values of the other. 
\end{enumerate}

%\item[2.] 问题：解释三种相关系数的差别？
%\item 解答：

\item  定义：随机变量 $X$ 与 $Y$ 的相关系数定义为 
{\footnotesize 
$$\rho (X,Y) = \frac{\mathbb{E}(XY) - \mathbb{E}(X)\mathbb{E}(Y)}{\sqrt{Var(X)}\sqrt{Var(Y)}}
= \frac{cov(X,Y)}{\sqrt{cov(X,X)}\sqrt{cov(Y,Y)}}
.$$
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.4.b. }

\begin{itemize}

\item  {\color{red}问题：写出皮尔逊相关系数的计算公式。}

\item 解答：这是两组数据的皮尔逊相关系数：
{\footnotesize 
\[ r = \frac{\sum (x_i-\bar{x})(y_i-\bar{y})}{\sqrt{\sum(x_i-\bar{x})^2}\sqrt{\sum(y_i-\bar{y})^2}}. \]
}

\item  {\color{red}问题：皮尔逊相关系数与两个随机变量之间的相关系数的联系是什么？}

\item 解答：一个是数据的计算，一个是理论的公式。两个随机变量的协方差
{\footnotesize 
$$cov(X,Y) = \mathbb{E}[(X-\mathbb{E}(X))(Y-\mathbb{E}(Y))] = \mathbb{E}(XY) - \mathbb{E}(X)\mathbb{E}(Y). $$
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.4.1.a. 皮尔逊相关系数 Pearson correlation}

\begin{itemize}
\item  {\color{red}问题：皮尔逊相关系数与二维正态分布的关系是什么？}

\item  解答：二维正态分布的联合概率密度函数
{\footnotesize 
\begin{eqnarray*}
f(\vec{x}) = \frac{1}{\sqrt{(2\pi)^2|V|}} \exp\left\{ - \frac{(\vec{x}-\vec{\mu} )^TV^{-1}(\vec{x}-\vec{\mu} )}{2} \right\},
\end{eqnarray*}
}
其中 $\vec{\mu}$ 是随机向量 $\vec{X}$ 的均值向量，$V$ 是 $\vec{X}$ 的协方差矩阵：
{\footnotesize 
\begin{eqnarray*}
V = \begin{bmatrix} cov(X,X) & cov(X,Y) \\ cov(Y,X) & cov(Y,Y) \end{bmatrix}
= \begin{bmatrix} \sigma_X^2 & \rho\sigma_X\sigma_Y \\ \rho\sigma_X\sigma_Y & \sigma_Y^2 \end{bmatrix}. 
\end{eqnarray*}
}

%\item 等高线的椭圆形状越扁，则两个边际分布之间的皮尔逊相关系数越接近 $1$ 或者$-1$.
\item  参考文献1
\item  参考文献2

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.4.1.b. }

\begin{itemize}

\item  {\color{red}问题：如何生成服从二维正态分布的一些随机数？}

\item 解答：先在水平方向上生成一维正态分布的随机数，然后对每个随机数，在竖直方向上生成一个正态分布的随机数。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
rho <- -0.60
n <- 300
x <- rnorm(n)
y <- rnorm(n,rho*x,sqrt(1-rho^2))
plot(x,y,pch=23, col='blue')
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.4.1.c. }

 \begin{figure}
 \centering
 \includegraphics[height=0.7\textheight, width=0.9\textwidth]{plot-6-4-1-1.png}
% \caption{Two dimensional normal distribution. }
 \end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.4.1.d. 皮尔逊相关系数的计算与检验 }

\begin{itemize}
\item  {\color{red}问题：计算 \verb+thuesen+ 数据框中的 \verb+blood.glucose+ 和 \verb+short.velocity+ 数据之间的皮尔逊相关系数。}

\item 解答：第二种是直接输入数据框作为参数。注意对缺失数据的处理。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> cor(blood.glucose,short.velocity,use='complete.obs')
> cor(thuesen,use='complete.obs')
\end{lstlisting}

\item  {\color{red}问题：检验这两组数据之间的皮尔逊相关系数是否显著不等于零。}

\item 解答：使用 \,{\color{blue}\verb+cor.test()+} 函数。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> cor.test(blood.glucose,short.velocity)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.4.2.a. 斯皮尔曼相关系数 Spearman's $\rho$}

\begin{itemize}
\item  {\color{red}问题：解释斯皮尔曼相关系数的计算公式。}

\item 解答：将两组数据排序，将每对数据的排名的差记为 $d_i$, 则斯皮尔曼等级相关系数定义为 
{\footnotesize 
\[ r = 1- \frac{6}{n(n^2-1)} \sum\limits_{i=1}^{n} d_i^2. \]
}

\item  {\color{red}问题：检验这两组数据之间的斯皮尔曼相关系数是否显著不等于零。}

\item 解答：使用 \,{\color{blue}\verb+cor.test()+} 函数，并指定参数 \,{\color{blue}\verb+method+} 的值。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> cor.test(blood.glucose,short.velocity,method='spearman')
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.4.2.b. 计算 Spearman 相关系数的一个例子}

\begin{itemize}
\item  {\color{red}问题：设两位裁判给7位运动员的打分如下，问打分是否一致？}
{\footnotesize 
\begin{center}
\begin{tabular}{|c|ccccccc|} \hline 
运动员 & A & B & C & D & E & F & G  \\ \hline 
裁判甲 & $3$ & $5$ & $7$ & $4$ & $2$ & $1$ & $6$ \\ \hline
裁判乙 & $2$ & $4$ & $5$ & $6$ & $3$ & $1$ & $7$ \\ \hline
 $d$ = 名次的差 & $1$ & $1$ & $2$ & $-2$ & $-1$ & $0$ & $-1$ \\ \hline 
\end{tabular}
\end{center}
}

\item 解答：按照 Spearman 等级相关系数的定义，结果是比较一致。
{\footnotesize 
\begin{eqnarray*}
 r_s & =&  1-\frac{6}{n(n^2-1)}\sum\limits_{i=1}^{n} (d_i)^2 \\
 &=& 1-\frac{6\times(1+1+4+4+1+0+1)}{(7)\times (7^2-1)} 
 = 0.7857. 
\end{eqnarray*}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.4.3. 肯德尔相关系数 Kendall's $\tau$}

\begin{itemize}
\item  {\color{red}问题：解释肯德尔相关系数的计算公式。}

\item 解答：设有样本数据 $(x_1,x_2,\cdots,x_n)$ 与 $(y_1,y_2,\cdots,y_n)$. 如果当 $x_i<x_j$ 时也有 $y_i<y_j$, 则称数据 $x$ 与数据 $y$ 在 $(i,j)$ 这一对观测上是一致的。否则就称是不一致的。肯德尔相关系数是大小一致的对数与大小不一致的对数的所占比例的差。理论公式为
{\footnotesize 
$$ \tau = \mathbb{P}[(X_i-X_j)(Y_i-Y_j)>0] - \mathbb{P}[(X_i-X_j)(Y_i-Y_j)<0]. $$
}

%The Kendall $\tau$ is based on counting the number of concordant and discordant pairs. A pair of points is concordant if the difference in the $x$-coordinate is of the same sign as the difference in the $y$-coordinate. 


\item  {\color{red}问题：检验 \verb+thuesen+ 数据的肯德尔相关系数是否显著不等于零。}

\item 解答：使用 \,{\color{blue}\verb+cor.test()+} 函数，并指定参数 \,{\color{blue}\verb+method+} 的值。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> cor.test(blood.glucose,short.velocity,method='kendall')
\end{lstlisting}

%s=0
%for (i in 1:22)
%    for (j in (i+1):23)
%    s=s+sign((x[i]-x[j])*(y[i]-y[j]))

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.5.1. Exercise. }

\begin{itemize}

\item %6.1 
With the \,{\color{blue}\texttt{rmr}} data set, plot metabolic rate versus body weight. Fit a linear regression model to the relation. According to the fitted model, what is the predicted metabolic rate for a body weight of 70 kg?  Give a 95\% confidence interval for the slope of the line.

\item  What is the predicted metabolic rate for a body weight of 70 kg? 
\begin{enumerate}[(a)]
\item  1305. 
\item  1405. 
\item  1505. 
\item  1605. 
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.5.1. Exercise - Answer. }

\begin{itemize}

\item %6.1 
(a). 
The following gives both elementary and more general answers. Notice the use of \,{\color{blue}\texttt{confint}}.

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
# library(ISwR)
> plot(rmr)
> fit <- lm(metabolic.rate ~ body.weight, data=rmr)
> summary(fit)
> 811.2267 + 7.0595 * 70
> predict(fit, newdata=data.frame(body.weight=70))

> qt(.975,42)
> 7.0595 + c(-1,1) * 2.018 * 0.9776
> confint(fit)
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.5.2. Exercise. }

\begin{itemize}

\item %6.2 
In the \,{\color{blue}\texttt{juul}} data set, fit a linear regression model for the square root of the IGF-I concentration versus age to the group of subjects over 25 years old.

\item  What is the slope of the regression line?
\begin{enumerate}[(a)]
\item  $-0.1053$.
\item  $-0.2053$.
\item  $-0.3053$.
\item  $-0.4053$.
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.5.2. Exercise - Answer. }

\begin{itemize}

\item %6.2 
(a). 使用模型公式 \,{\color{blue}\texttt{sqrt(igf1) $\sim$ age}}, 使用 \,{\color{blue}\texttt{subset}} 参数指定研究范围。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
# library(ISwR)
summary(lm(sqrt(igf1)~age,data=juul,subset=age>25))
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.5.3. Exercise. }

\begin{itemize}

\item %6.3 
In the \,{\color{blue}\texttt{malaria}} data set, analyze the log-transformed antibody level versus age. Make a plot of the relation. Do you notice anything peculiar?

\item  What is the residual standard error?
\begin{enumerate}[(a)]
\item  $0.5$. 
\item  $1.5$. 
\item  $2.5$. 
\item  $3.5$. 
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.5.3. Exercise - Answer. }

\begin{itemize}

\item %6.3 
(b). We can fit a linear model and plot the data as follows:

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
# library(ISwR)
summary(lm(log(ab)~age, data=malaria))
plot(log(ab)~age, data=malaria)
\end{lstlisting}

\item  
The plot appears to show a cyclic pattern. It is unclear whether it reflects a significant departure from the model, though. Malaria is a disease with epidemic behaviour, so cycles are plausible.


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.5.4. Exercise. }

\begin{itemize}

\item  %6.4 
One can generate simulated data from the two-dimensional normal distribution with a correlation of $\rho$ by the following technique: 
\begin{enumerate}[(a)]
\item   Generate $X$ as a normal variate with mean 0 and standard deviation 1; 
\item   generate $Y$ with mean $\rho X$  and standard deviation $\sqrt{1-\rho^2}$. 
\end{enumerate}

Use this to create scatterplots of simulated data with a given correlation. Compute the Spearman and Kendall statistics for some of these data sets.


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.5.4. Exercise - Answer. }

\begin{itemize}

\item %6.4 
This could be elaborated by wrapping the random number generation in a function, etc.

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
rho <- .90
n <- 100
x <- rnorm(n)
y <- rnorm(n, rho * x, sqrt(1 - rho^2))
plot(x, y)
cor.test(x, y)
cor.test(x, y, method="spearman")
cor.test(x, y, method="kendall")
\end{lstlisting}

You will most likely find that the Kendall correlation is somewhat smaller than the two others.

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.1. 单项选择题 }

\begin{itemize}

\item %1：简单线性回归模型的基本知识

关于一元线性回归模型 $$y_i=\alpha+\beta x_i+\varepsilon_i, \, (1\le i\le n),$$ 
下述哪个说法不正确？
\begin{enumerate}[(a)]
\item  误差项 $\varepsilon_1,\cdots,\varepsilon_n$ 相互独立，且均值为零，方差相等。
\item  误差项的方差 $\sigma^2$ 的无偏估计量为 $\hat{\sigma}^2=\text{SSE}/n$, 
其中残差平方和 $\text{SSE}=\sum\limits_{i=1}^n e_i^2.$ 
\item  参数 $\alpha$ 的最小二乘估计量为 $\hat{\alpha}=\bar{y}-\hat{\beta}\bar{x}$.
\item  参数 $\beta$ 的最小二乘估计量为 $\hat{\beta}=\left[\sum\limits_{i=1}^n (x_i-\bar{x})(y_i-\bar{y})\right] \left[\sum\limits_{i=1}^n (x_i-\bar{x})^2\right]^{-1}$. 
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.1. 单项选择题 }

\begin{itemize}

\item %1：
解答：(b).
误差项的方差无偏估计量是 $\hat{\sigma}^2=\text{SSE}/(n-2)$. 


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.2. 单项选择题 }

\begin{itemize}

\item %2：系数的估计
为估计一元线性回归模型 $y=\alpha+\beta x+\varepsilon$ 的参数，使用目标函数 $$Q(\alpha,\beta)=\sum\limits_{i=1}^n (y_i-\alpha-\beta x_i)^2,$$ 
并求参数 $\alpha,\beta$ 使得 $Q$ 取最小。这种参数估计方法的名称是什么？ 
\begin{enumerate}[(a)]
\item  矩估计法。
\item  极大似然估计法。
\item  最小二乘法。
\item  期望最大化方法。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.2. 单项选择题 }

\begin{itemize}

\item %2：
解答：(c).
毫无疑问，这是最小二乘法。


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.3. 单项选择题 }

\begin{itemize}

\item %3：系数是否为零的t检验
考虑一元线性回归模型 $y_i=\alpha+\beta x_i+\varepsilon_i, \, (1\le i\le n)$, 关于参数的检验，下述哪个说法不正确？
\begin{enumerate}[(a)]
\item  检验参数 $\beta$ 是否显著不等于零，就是检验变量 $x$ 是否对 $y$ 有显著的线性影响。
\item  在误差项是正态分布的假设下，参数估计量 $\hat{\beta}$ 服从 $N(\beta,\sigma^2/L_{xx})$ 分布。
\item  变量 $x$ 的离差平方和为 $L_{xx}=\sum\limits_{i=1}^n (x_i-\bar{x})^2$. 
\item  在 $H_0: \beta=0$ 为真的假设下，统计量 $t=\frac{\hat{\beta}}{\text{SE}(\hat{\beta})}$ 服从自由度为 $n-2$ 的 $t$ 分布，其中 $\text{SE}(\hat{\beta})=\sigma\sqrt{L_{xx}}$. 
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.3. 单项选择题 }

\begin{itemize}

\item %3：
解答：(d).
检验统计量的分母应该是 $\text{SE}(\hat{\beta})=\hat{\sigma}\sqrt{L_{xx}}$, 其中的误差项 $\varepsilon$ 的标准差 $\sigma$ 是未知的，要使用估计值 $\hat\sigma$. 



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.6.4. 单项选择题 }

\begin{itemize}

\item %4：模型方程
下述代码载入程序包 \,{\color{blue}\texttt{ISwR}}, 并研究数据框 \,{\color{blue}\texttt{thuesen}}. 

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> # library(ISwR)  #1
> thuesen  #2
> mydata <- thuesen[complete.cases(thuesen),]  #3
> colnames(mydata) <- c('x','y')  #4
> mydata  #5
> lm01 <- lm(y~x,data=mydata)  #6
> lm02 <- lm(y~x-1,data=mydata)  #7
> lm03 <- lm(y~.,data=mydata)  #8
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.6.4. 单项选择题 }

\begin{itemize}

\item %4：模型方程
下述说法中，哪个是不正确的？
\begin{enumerate}[(a)]
\item  数据框 \,{\color{blue}\verb+mydata+} 共有 24 行观测数据。
\item  回归模型 \,{\color{blue}\verb+lm01+} 是不过原点的线性回归。
\item  回归模型 \,{\color{blue}\verb+lm02+} 是过原点的线性回归。
\item  回归模型 \,{\color{blue}\verb+lm03+} 是对除了 \,{\color{blue}\verb+y+} 之外的所有变量进行的线性回归，包括截距项。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.6.4. 单项选择题 }

\begin{itemize}

\item %4：
解答：(a).
数据框 \,{\color{blue}\verb+mydata+} 共有 23 行观测数据。



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.6.5. 单项选择题 }

\begin{itemize}

\item %5：残差值与回归值
研究数据框 thuesen, 运行下述代码。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> # library(ISwR)  #1
> mydata <- thuesen[complete.cases(thuesen),]  #2
> colnames(mydata) <- c('x','y')  #3
> options(digits=4)  #4
> lm01 <- lm(y~x,data=mydata)  #5
> yhat <- fitted(lm01)  #6
> e <- resid(lm01)  #7

> options(digits=2)  #8
> e  #9
> em <- matrix(e)  #10
> em  #11
> class(em)  #12
> dim(em)  #13
> qqnorm(e)  #14
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.5. 单项选择题 }

\begin{itemize}

\item %5：残差值与回归值
下述说法中，哪个是不正确的？
\begin{enumerate}[(a)]
\item  第6行程序从回归模型里提取出因变量的回归值。
\item  第7行程序从回归模型里提取出残差值。
\item  第13行程序得到这个矩阵的维数是1行23列的。
\item  第14行程序检验残差是否服从正态分布。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.5. 单项选择题 }

\begin{itemize}

\item %5：
解答：(c).
第10行程序得到这个矩阵的维数是23行1列的。



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.6.6. 单项选择题 }

\begin{itemize}

\item %6：lm函数的输入与输出的解读
研究数据框 thuesen, 运行下述代码。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> # library(ISwR)  #1
> mydata <- thuesen[complete.cases(thuesen),]  #2
> colnames(mydata) <- c('x','y')  #3
> options(digits=2)  #4
> lm01 <- lm(y~x,data=mydata)  #5
> yhat <- fitted(lm01)  #6
> x <- mydata$x  #7
> y <- mydata$y  #8
> plot(x,y,ylim=c(0,3))  #9
> abline(lm01)  #10
> points(x,yhat,col='red')  #11
> segments(x,y,x,yhat,lwd=3,col='blue')  #12
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.6. 单项选择题 }

\begin{itemize}

\item %6：lm函数的输入与输出的解读
下述说法中，哪个是不正确的？
\begin{enumerate}[(a)]
\item  第9行程序画出了数据的散点图。
\item  第10行程序画出了数据的坐标轴。
\item  第11行程序画出了回归点。
\item  第12行程序画出了残差线段。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.6. 单项选择题 }

\begin{itemize}

\item %6：
解答：(b).
第10行程序画出了回归直线。



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.6.7. 单项选择题 }

\begin{itemize}

\item %7：置信带与预测带的概念计算与绘图
载入程序包 ISwR 中的数据框 rmr, 选取体重为自变量，代谢率为因变量。
画出散点图和回归直线。设置信水平为 95\%. 

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> # library(ISwR)
> mydata <- rmr  
> head(mydata)
> colnames(mydata) <- c('x','y')
> head(mydata)
> lm02 <- lm(y~x,data=mydata)
> plot(mydata)
> abline(lm02)
> summary(lm02)
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.7. 单项选择题 }

\begin{itemize}

\item %7：置信带与预测带的概念计算与绘图
计算回归直线的斜率的置信区间，选出正确的答案。
\begin{enumerate}[(a)]
\item  $[3.09, 9.03]$. 
\item  $[4.09, 8.03]$. 
\item  $[5.09, 9.03]$. 
\item  $[6.09, 8.03]$. 
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.6.7. 单项选择题 }

\begin{itemize}

\item %7：
解答：(c).
运行程序即得。按照定义计算如下，
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> betahat <- 7.060
> sebh <- 0.978  # sebh = standard error of beta hat 
> df <- 42
> betahat - qt(0.975,df)*sebh
> betahat + qt(0.975,df)*sebh
\end{lstlisting}

从lm函数的结果中提取置信区间的方法如下，
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> confint(lm02)
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.8. 单项选择题 }

\begin{itemize}

\item %8：Pearson相关系数
载入程序包 ISwR 中的数据框 rmr, 选取体重为自变量，代谢率为因变量。
预测60公斤的体重，对应的代谢率是多少？
\begin{enumerate}[(a)]
\item  1235.
\item  1335.
\item  1435.
\item  1525.
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.6.8. 单项选择题 }

\begin{itemize}

\item %8：
解答：(a). 根据 lm 函数的结果，使用 predict 函数，

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> # library(ISwR) 
> mydata <- rmr  
> colnames(mydata) <- c('x','y')
> lm02 <- lm(y~x,data=mydata)
> predict(lm02,newdata=data.frame(x=60))
\end{lstlisting}

或者代入回归方程直接计算，
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> 811.227+7.06*60
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.9. 单项选择题 }

\begin{itemize}

\item %9：Spearman相关系数
研究数据框 rmr, 计算体重和代谢率的皮尔逊相关系数。
\begin{enumerate}[(a)]
\item  0.7397.
\item  0.5543.
\item  0.7442.
\item  0.5397.
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.6.9. 单项选择题 }

\begin{itemize}

\item %9：
解答：(c). 使用 cor 函数或 cor.test 函数，

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> # library(ISwR) 
> mydata <- rmr
> colnames(mydata) <- c('x','y')
> cor(mydata$x,mydata$y)
> cor(mydata$x,mydata$y,method='pearson')
> cor.test(mydata$x,mydata$y,method='pearson')
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{6.6.10. 单项选择题 }

\begin{itemize}

\item %10：Kendall相关系数
下述程序模拟生成二维正态分布的随机数，并画出散点图。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> rho <- c(-0.8,-0.2,0.6,0.9)  #1
> par(mfrow=c(2,2))  #2
> for (k in 1:4)  #3
+ {  #4
+   x <- rnorm(100)  #5
+   y <- rnorm(100,rho[k]*x,sqrt(1-rho[k]^2))  #6
+   plot(x,y)  #7
+ }  #8
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.10. 单项选择题 }

\begin{itemize}

\item %10：Kendall相关系数
找出皮尔逊相关系数最大的图。
\begin{enumerate}[(a)]
\item  第一个图。
\item  第二个图。
\item  第三个图。
\item  第四个图。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.6.10. 单项选择题 }

\begin{itemize}

\item %10：
解答：(d).
看图可知，第四个图最接近直线，且斜率为正。

%\item  这个二维的随机数服从什么分布？


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.7.1. 简答题（参考茆诗松书本8.4节） }

\begin{itemize}

\item 
设有自变量 $x$ 和因变量 $y$ 的观测数据 $x_1,\cdots, x_n$ 和 $y_1,\cdots, y_n$. 在知道 $x$ 的值后，$y$ 的值是一个随机变量 $Y$, 其均值写成 $x$ 的函数为 
\[ f(x)=E(Y|x)=\int_{-\infty}^{\infty} yp(y|x)dx. \] 
这称为 $y$ 关于 $x$ 的回归函数。回归方程的一般形式是 $y=f(x)+\varepsilon$. 
\begin{enumerate}
\item 一元线性回归模型的形式如下，该模型的参数是哪些？如何估计参数？

\vspace{-0.4cm}
 \[ y_i=\alpha+\beta x_i+\varepsilon_i, \,\,\, \varepsilon\sim N(0,\sigma^2). \]
\vspace{-0.4cm}

\item 如果 $\beta$ 的真实值不为零，则称该模型是显著的。如何检验假设 

\vspace{-0.4cm}
\[ H_0: \beta=0, \,\, \mathrm{v.s.} \,\,  H_1: \beta\neq 0 \,\,? \] 
\vspace{-0.4cm}

\item 写出统计量 $R^2$ 和 $R^2_{\mathrm{adj}}$ 的计算公式。它们的作用是什么？
\item 写出预测带和置信带的定义，解释它们的差别。
\end{enumerate}



\end{itemize}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.7.2. 简答题（参考茆诗松书本第8.4节） }


\begin{itemize}


\item  
由专业知识知道，合金的强度 $y$ （单位：$10^7$Pa）与合金中碳的含量 $x$ （单位：\%）有关。
现收集到下述数据，

\begin{table}[ht] \scriptsize\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|c|c|c|c|c|}\hline
序号 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 11 & 12 \\
\hline
$x$  &0.10 &0.11 &0.12 &0.13 &0.14 &0.15& 0.16 & 0.17 & 0.18 & 0.20 & 0.21 & 0.23  \\
\hline
$y$  &42.0 &43.0 &45.0 &45.0 &45.0 &47.5 &49.0 &53.0 &50.0 &55.0 &55.0 &60.0  \\ \hline
\end{tabular}
\end{table}

\begin{enumerate}
\item  将数据存入一个数据框。画出散点图，求出合金强度与碳含量的线性回归方程。
\item  计算上述回归方程的方差分析表。判断回归方程是否显著。
\item  画出预测带和置信带。现设碳含量为 $x_0=0.16$, 求合金强度 $y_0$ 的0.95预测区间和 $E(y_0)$ 的0.95置信区间。
\end{enumerate}



\end{itemize}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.7.3. 简答题（参考茆诗松书本例8.4.5） }


\begin{itemize}

\item 
观测某种动物18只，测得其质量 $x$ 与体积 $y$ 的数据。

\begin{table}[ht] \scriptsize\centering
\begin{tabular}{|cc|cc|cc|cc|cc|cc|}\hline
 x & y & x & y & x & y & x & y & x & y & x & y \\ \hline
10.4 & 10.2 & 12.1 & 11.9 & 15.1 & 14.8 & 15.7 & 15.7 & 16.5 & 15.9 & 17.1 & 16.7 \\
10.5 & 10.4 & 13.8 & 13.5 & 15.1 & 15.1 & 15.8 & 15.2 & 16.7 & 16.6 & 17.8 & 17.6 \\
11.9 & 11.6 & 15.0 & 14.5 & 15.1 & 14.5 & 16.0 & 15.8 & 17.1 & 16.7 & 18.4 & 18.3 \\
\hline
\end{tabular}
\end{table}

\begin{enumerate}
\item 计算这种动物的体积与重量的回归方程。
\item 计算方差分析表，判断回归方程的显著性。
\item 如果测得某这种动物的重量为 17.6 公斤，估计其体积。
\end{enumerate}


\end{itemize}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.7.4. 简答题 }


\begin{itemize}


\item 设有两组数据 $x_1,\cdots, x_n$ 和 $y_1,\cdots, y_n$. 分别写出 Pearson 相关系数、Spearman 相关系数、和Kendall 相关系数的计算公式。



\end{itemize}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.7.5. 简答题 }


\begin{itemize}


\item  研究 ISwR 程序包里的 thuesen 数据框。
\begin{enumerate}
\item  使用 lm() 函数进行线性回归分析。解释 lm() 函数的输出结果。
\item  使用 lm()函数后得到一个模型对象，对其使用 summary() 函数后，解释输出的结果。内容包括模型公式、残差、参数估计和检验、标准误的估计、R方和调整的R方、模型的检验和p值。
\item  计算回归值，画出散点图和回归线。使用 QQ 图来检验残差是否服从正态分布。
\item  画出预测带和置信带。
\end{enumerate}


\end{itemize}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.7.6. 简答题 }


\begin{itemize}


\item  什么是两个随机变量之间的相关性？研究 ISwR 程序包里的 thuesen 数据框。
\begin{enumerate}
\item  设有两组数据 $x_1,\cdots, x_n$ 和 $y_1,\cdots, y_n$. 分别写出 Pearson 相关系数、Spearman 相关系数、和Kendall 相关系数的计算公式。
\item  使用 cor() 函数计算 thuesen 数据框里的两列数据的 Pearson 相关系数。
\item  使用 cor.test() 函数检验这两列数据的 Pearson 相关系数是否显著不等于零。
\item  使用 cor.test() 函数检验这两列数据的 Spearman 相关系数是否显著不等于零。
\item  使用 cor.test() 函数检验这两列数据的 Kendall 相关系数是否显著不等于零。
\end{enumerate}


\end{itemize}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.7.7. 简答题 }

\begin{itemize}


\item  研究 ISwR 程序包里的 rmr 数据框。
With the rmr data set, plot metabolic rate versus body weight. Fit a linear regression model to the relation. According to the fitted model, what is the predicted metabolic rate for a body weight of 70 kg? Give a 95\% confidence interval for the slope of the line.

\begin{enumerate}
\item  画出代谢率关于体重的散点图。求出一个线性回归模型。
\item  根据这个模型，体重70公斤的代谢率是多少？
\item  计算这条回归线的斜率的 95\% 的置信区间。
\end{enumerate}



\end{itemize}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.7.8. 简答题 }


\begin{itemize}



\item  研究 ISwR 程序包里的 juul 数据框。
In the juul data set, fit a linear regression model for the square root of the IGF-I concentration versus age to the group of subjects over 25 years old.

\begin{enumerate}
\item  找出25岁以上的人群，另存为一个数据框。
\item  以年龄为自变量， IGF-I生长因子的平方根为应变量，求出一个线性回归模型。
\end{enumerate}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.7.9. 简答题 }


\begin{itemize}


\item  研究 ISwR 程序包里的 malaria 数据框。
以年龄为自变量， 变量 antibody level 的对数为应变量，求出一个线性回归模型。
In the malaria data set, analyze the log-transformed antibody level versus age. Make a plot of the relation. Do you notice anything peculiar?



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{6.7.10. 简答题 }


\begin{itemize}


\item  按照下述方法生成服从二维正态分布的随机数。

\begin{enumerate}
\item  生成标准正态分布的随机数 $x_i$.
\item  固定参数 $\rho$, 生成正态分布 $N(\rho x_i, 1-\rho^2)$ 的随机数 $y_i$. 
\item  画出 $\{ (x_i,y_i)\mid i=1,\cdots,n \}$ 的散点图。
\item  计算数据 $(x_1,\cdots,x_n)$ 和数据 $(y_1,\cdots,y_n)$ 的 Spearman 相关系数和 Kendall 等级相关系数。 
\item  如何检验这些数据是否服从二维正态分布？
\end{enumerate}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{document}
